Hackathon on Electric Vehicle Data Analysis¶

Name - Shreyanshu Batchno- 197

In [1]:
import pandas as pd,numpy as np
In [2]:
df = pd.read_csv(r"C:\Users\sreya\Downloads\dataset.csv")
In [3]:
df
Out[3]:
VIN (1-10) County City State Postal Code Model Year Make Model Electric Vehicle Type Clean Alternative Fuel Vehicle (CAFV) Eligibility Electric Range Base MSRP Legislative District DOL Vehicle ID Vehicle Location Electric Utility 2020 Census Tract
0 JTMEB3FV6N Monroe Key West FL 33040 2022 TOYOTA RAV4 PRIME Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 42 0 NaN 198968248 POINT (-81.80023 24.5545) NaN 12087972100
1 1G1RD6E45D Clark Laughlin NV 89029 2013 CHEVROLET VOLT Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 38 0 NaN 5204412 POINT (-114.57245 35.16815) NaN 32003005702
2 JN1AZ0CP8B Yakima Yakima WA 98901 2011 NISSAN LEAF Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 73 0 15.0 218972519 POINT (-120.50721 46.60448) PACIFICORP 53077001602
3 1G1FW6S08H Skagit Concrete WA 98237 2017 CHEVROLET BOLT EV Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 238 0 39.0 186750406 POINT (-121.7515 48.53892) PUGET SOUND ENERGY INC 53057951101
4 3FA6P0SU1K Snohomish Everett WA 98201 2019 FORD FUSION Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 26 0 38.0 2006714 POINT (-122.20596 47.97659) PUGET SOUND ENERGY INC 53061041500
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
112629 7SAYGDEF2N King Duvall WA 98019 2022 TESLA MODEL Y Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... 0 0 45.0 217955265 POINT (-121.98609 47.74068) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 53033032401
112630 1N4BZ1CP7K San Juan Friday Harbor WA 98250 2019 NISSAN LEAF Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 150 0 40.0 103663227 POINT (-123.01648 48.53448) BONNEVILLE POWER ADMINISTRATION||ORCAS POWER &... 53055960301
112631 1FMCU0KZ4N King Vashon WA 98070 2022 FORD ESCAPE Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 38 0 34.0 193878387 POINT (-122.4573 47.44929) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 53033027702
112632 KNDCD3LD4J King Covington WA 98042 2018 KIA NIRO Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 26 0 47.0 125039043 POINT (-122.09124 47.33778) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 53033032007
112633 YV4BR0CL8N King Covington WA 98042 2022 VOLVO XC90 Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 18 0 47.0 194673692 POINT (-122.09124 47.33778) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 53033032005

112634 rows × 17 columns

In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112634 entries, 0 to 112633
Data columns (total 17 columns):
 #   Column                                             Non-Null Count   Dtype  
---  ------                                             --------------   -----  
 0   VIN (1-10)                                         112634 non-null  object 
 1   County                                             112634 non-null  object 
 2   City                                               112634 non-null  object 
 3   State                                              112634 non-null  object 
 4   Postal Code                                        112634 non-null  int64  
 5   Model Year                                         112634 non-null  int64  
 6   Make                                               112634 non-null  object 
 7   Model                                              112614 non-null  object 
 8   Electric Vehicle Type                              112634 non-null  object 
 9   Clean Alternative Fuel Vehicle (CAFV) Eligibility  112634 non-null  object 
 10  Electric Range                                     112634 non-null  int64  
 11  Base MSRP                                          112634 non-null  int64  
 12  Legislative District                               112348 non-null  float64
 13  DOL Vehicle ID                                     112634 non-null  int64  
 14  Vehicle Location                                   112610 non-null  object 
 15  Electric Utility                                   112191 non-null  object 
 16  2020 Census Tract                                  112634 non-null  int64  
dtypes: float64(1), int64(6), object(10)
memory usage: 14.6+ MB
In [5]:
df.isnull().sum()
Out[5]:
VIN (1-10)                                             0
County                                                 0
City                                                   0
State                                                  0
Postal Code                                            0
Model Year                                             0
Make                                                   0
Model                                                 20
Electric Vehicle Type                                  0
Clean Alternative Fuel Vehicle (CAFV) Eligibility      0
Electric Range                                         0
Base MSRP                                              0
Legislative District                                 286
DOL Vehicle ID                                         0
Vehicle Location                                      24
Electric Utility                                     443
2020 Census Tract                                      0
dtype: int64
  • As we can see we have few columns having null values we have to fill them.
In [7]:
# Filling the nullvalues 
df["Model"]=df.groupby(["Make"])['Model'].transform(lambda x: x.fillna(x.mode()[0]))
df["Legislative District"]=df.groupby(["County","City","State","Postal Code"])["Legislative District"].transform(lambda x: x.fillna(x.mode()[0]))
df["Electric Utility"]=df.groupby(["Make","Model","Electric Vehicle Type","Clean Alternative Fuel Vehicle (CAFV) Eligibility"])["Electric Utility"].transform(lambda x: x.fillna(x.mode()[0]))
  • We have 24 values in Vehicle Location which is 0.021% percentage of data.
  • so the missing values can't effect the data
  • So we are trimming the missing value rows
In [8]:
df.dropna(subset=['Vehicle Location'], inplace=True)
df.reset_index(drop=True,inplace=True)
df
Out[8]:
VIN (1-10) County City State Postal Code Model Year Make Model Electric Vehicle Type Clean Alternative Fuel Vehicle (CAFV) Eligibility Electric Range Base MSRP Legislative District DOL Vehicle ID Vehicle Location Electric Utility 2020 Census Tract
0 JTMEB3FV6N Monroe Key West FL 33040 2022 TOYOTA RAV4 PRIME Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 42 0 NaN 198968248 POINT (-81.80023 24.5545) CITY OF SEATTLE - (WA)|CITY OF TACOMA - (WA) 12087972100
1 1G1RD6E45D Clark Laughlin NV 89029 2013 CHEVROLET VOLT Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 38 0 NaN 5204412 POINT (-114.57245 35.16815) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 32003005702
2 JN1AZ0CP8B Yakima Yakima WA 98901 2011 NISSAN LEAF Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 73 0 15.0 218972519 POINT (-120.50721 46.60448) PACIFICORP 53077001602
3 1G1FW6S08H Skagit Concrete WA 98237 2017 CHEVROLET BOLT EV Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 238 0 39.0 186750406 POINT (-121.7515 48.53892) PUGET SOUND ENERGY INC 53057951101
4 3FA6P0SU1K Snohomish Everett WA 98201 2019 FORD FUSION Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 26 0 38.0 2006714 POINT (-122.20596 47.97659) PUGET SOUND ENERGY INC 53061041500
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
112605 7SAYGDEF2N King Duvall WA 98019 2022 TESLA MODEL Y Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... 0 0 45.0 217955265 POINT (-121.98609 47.74068) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 53033032401
112606 1N4BZ1CP7K San Juan Friday Harbor WA 98250 2019 NISSAN LEAF Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 150 0 40.0 103663227 POINT (-123.01648 48.53448) BONNEVILLE POWER ADMINISTRATION||ORCAS POWER &... 53055960301
112607 1FMCU0KZ4N King Vashon WA 98070 2022 FORD ESCAPE Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 38 0 34.0 193878387 POINT (-122.4573 47.44929) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 53033027702
112608 KNDCD3LD4J King Covington WA 98042 2018 KIA NIRO Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 26 0 47.0 125039043 POINT (-122.09124 47.33778) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 53033032007
112609 YV4BR0CL8N King Covington WA 98042 2022 VOLVO XC90 Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 18 0 47.0 194673692 POINT (-122.09124 47.33778) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 53033032005

112610 rows × 17 columns

In [9]:
df.isnull().sum()
Out[9]:
VIN (1-10)                                             0
County                                                 0
City                                                   0
State                                                  0
Postal Code                                            0
Model Year                                             0
Make                                                   0
Model                                                  0
Electric Vehicle Type                                  0
Clean Alternative Fuel Vehicle (CAFV) Eligibility      0
Electric Range                                         0
Base MSRP                                              0
Legislative District                                 281
DOL Vehicle ID                                         0
Vehicle Location                                       0
Electric Utility                                       0
2020 Census Tract                                      0
dtype: int64
  • Now we can see all the null values are filled.
In [10]:
df[df.duplicated()]
Out[10]:
VIN (1-10) County City State Postal Code Model Year Make Model Electric Vehicle Type Clean Alternative Fuel Vehicle (CAFV) Eligibility Electric Range Base MSRP Legislative District DOL Vehicle ID Vehicle Location Electric Utility 2020 Census Tract
  • we don't have any duplicate value
In [11]:
df["Postal Code"].value_counts()
Out[11]:
98052    2916
98033    2059
98004    2001
98115    1880
98006    1852
         ... 
98305       1
21701       1
98621       1
84128       1
83876       1
Name: Postal Code, Length: 758, dtype: int64
In [12]:
df["Model Year"].value_counts()
Out[12]:
2022    26529
2021    18361
2018    14245
2020    11037
2019    10265
2017     8642
2016     5732
2015     4940
2013     4686
2014     3683
2023     1886
2012     1702
2011      838
2010       24
2008       23
2000       10
1999        3
2002        2
1997        1
1998        1
Name: Model Year, dtype: int64

From above we can see that Postal Code,Model Year and Legislative District are in numerical but they are not the continuous data

  • So we are changing the Data type of the Features
In [13]:
df["Legislative District"]=df["Legislative District"].astype("str")
df["Postal Code"]=df["Postal Code"].astype("str")
df["Model Year"]=df["Model Year"].astype("str")
In [ ]:
 

Creating plots using plotly library¶

In [14]:
import plotly.express as px
In [15]:
area=df["Model Year"].value_counts().nlargest(n=10)
fig=px.bar(y=area.values,
          x=area.index,
          orientation="v",
           color=area.index,
           
           text=area.values,
          color_discrete_sequence=px.colors.qualitative.Bold)
fig.show()
  • As we can see model year 2022 have the highest electrical cars.
In [16]:
area=df["Make"].value_counts().nlargest(n=10)
fig=px.bar(y=area.values,
          x=area.index,
          orientation="v",
           color=area.index,
           
           text=area.values,
          color_discrete_sequence=px.colors.qualitative.Bold)
fig.show()
  • Tesla Brand electrical cars are much heavy compare to other brands cars
In [17]:
Vechile_type = df['Electric Vehicle Type'].value_counts()
fig = px.pie(Vechile_type, names=Vechile_type.index, values=Vechile_type.values, title='Pie Chart of Electric Vehicle Type')
fig.show()
  • From above Graph we can see that Battery Electric Vehicle (BEV) are very high compare to Plug-in Hybrid Electric Vehicle (PHEV)
In [18]:
fig=px.histogram(df,x="Electric Range",nbins=10)
fig.show()
In [19]:
fig=px.box(df,x="Electric Range")
fig.show()
In [20]:
fig_boxplot = px.box(df, x='Electric Vehicle Type', y='Electric Range', title='Box Plot of Electric Vehicle Type vs. Electric Range')
fig_boxplot.show()
  • we can see there is no any outliers in Electric Range. but when we ploted above we can see that when we compared Electric Range with Electric Vechile Type.we can see outliers in Plug-in Hybrid Electric Vehicle (PHEV).
In [ ]:
 

Creating a Choropleth to display the number of EV vehicles based on States in USA¶

In [21]:
import plotly.graph_objects as go
def create_ev_choropleth_map(df):
    # Calculate the count of EV vehicles for each state
    ev_count_by_state = df['State'].value_counts().reset_index()
    ev_count_by_state.columns = ['State', 'EV Count']

    # Create the Choropleth map using plotly.graph_objects
    fig_choropleth = go.Figure(data=go.Choropleth(
        locations=ev_count_by_state['State'],
        z=ev_count_by_state['EV Count'],
        locationmode='USA-states',
        colorscale='Viridis',
        colorbar_title='Number of EV Vehicles',
    ))

    # Set the map title and layout
    fig_choropleth.update_layout(
        title_text='Choropleth Map of EV Vehicles by State',
        geo_scope='usa',  # Limit map scope to the USA
    )

    # Return the Choropleth map figure
    return fig_choropleth
# Call the function and display the Choropleth map
fig = create_ev_choropleth_map(df)
fig.show()

Create a Racing Bar Plot to display the animation of EV Make and its count each year¶

In [22]:
import pandas as pd

# Converting the 'Model Year' column to datetime type
df['Model Year'] = pd.to_datetime(df['Model Year'], format='%Y')

# Group by 'Model Year' and 'Make' to get the count of each make for each year
df_grouped = df.groupby(['Model Year', 'Make']).size().reset_index(name='Count')
In [23]:
import bar_chart_race as bcr
# Pivot the DataFrame to have 'Make' as columns and 'Model Year' as index
df_pivot = df_grouped.pivot(index='Model Year', columns='Make', values='Count')

# Fill missing values using forward fill (pad)
df_pivot = df_pivot.fillna(method='pad')


# Create the Racing Bar Plot
bcr.bar_chart_race(
    df=df_pivot,
    filename='ev_make_racing_bar_plot.mp4',  # Output file name for the animation
    orientation='h',  # Horizontal bars
    sort='desc',      # Sort the bars in descending order
    n_bars=10,        # Number of bars to show
    fixed_order=False, # Allow bars to change positions
    title='EV Make Racing Bar Plot by Year',  # Animation title
    label_bars=True,  # Show the value label on each bar
    period_label={'x': 0.99, 'y': 0.25, 'ha': 'right', 'va': 'center'},  # Position of the year label
    period_fmt='%Y',  # Format of the year label
)
In [ ]:
 
In [24]:
fig_bar = px.histogram(df,x ='Make',y='Model Year',
                 animation_frame="Make", animation_group="Model Year", 
               
                 color_discrete_sequence=px.colors.qualitative.T10)
fig_bar.update_yaxes(showgrid=False),
fig_bar.update_xaxes(categoryorder='total descending')
fig_bar.update_traces(hovertemplate=None)
fig_bar.update_layout(margin=dict(t=70, b=0, l=70, r=40),
                        hovermode="x unified",
                        xaxis_tickangle=360,
                        xaxis_title=' ', yaxis_title=" ",
                        plot_bgcolor='#2d3035', paper_bgcolor='#2d3035',
                        title_font=dict(size=25, color='#a5a7ab', family="Lato, sans-serif"),
                        font=dict(color='#8a8d93'),
                        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
                     )
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: